First let’s load some packages:

library(phenocamapi)
## Loading required package: data.table
## Loading required package: rjson
## Loading required package: RCurl
## Loading required package: bitops
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(phenocamr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Let’s start by pulling in a list of sites that have AmeriFlux towers and PhenoCams via the phenocamapi R package:

phenos=get_phenos()
#and let's check the column names:
colnames(phenos)
##  [1] "site"                      "lat"                      
##  [3] "lon"                       "elev"                     
##  [5] "active"                    "utc_offset"               
##  [7] "date_first"                "date_last"                
##  [9] "infrared"                  "contact1"                 
## [11] "contact2"                  "site_description"         
## [13] "site_type"                 "group"                    
## [15] "camera_description"        "camera_orientation"       
## [17] "flux_data"                 "flux_networks"            
## [19] "flux_sitenames"            "dominant_species"         
## [21] "primary_veg_type"          "secondary_veg_type"       
## [23] "site_meteorology"          "MAT_site"                 
## [25] "MAP_site"                  "MAT_daymet"               
## [27] "MAP_daymet"                "MAT_worldclim"            
## [29] "MAP_worldclim"             "koeppen_geiger"           
## [31] "ecoregion"                 "landcover_igbp"           
## [33] "dataset_version1"          "site_acknowledgements"    
## [35] "modified"                  "flux_networks_name"       
## [37] "flux_networks_url"         "flux_networks_description"

You can more about the phenocamapi R package from here.

Notice that ‘flux_data’ (true-false), ‘flux_networks’, and ‘flux_sitenames’ are all variables that you can either filter by or retain the info of. Let’s grab all Ameriflux sites where we have the sitename stored:

phenos=phenos%>%
  filter(phenos$flux_data=='TRUE')%>%
filter(flux_networks_name=='AMERIFLUX')%>%
  filter(flux_sitenames!='NA')%>%
  select(flux_sitenames, site, date_first, date_last, site_description, primary_veg_type, koeppen_geiger)
head(phenos)
##                       flux_sitenames                 site date_first
## 1                             US-NC4       alligatorriver 2012-05-03
## 2 US-Br1: Brooks Field Site 10- Ames          arsbrooks10 2019-07-10
## 3 US-Br3: Brooks Field Site 11- Ames          arsbrooks11 2019-07-01
## 4                             US-Rws arsgreatbasinltar098 2017-05-18
## 5                             US-Rms arsgreatbasinltar177 2018-10-17
## 6                             US-SP1           austincary 2016-01-11
##    date_last
## 1 2019-09-15
## 2 2019-09-15
## 3 2019-09-15
## 4 2019-09-11
## 5 2019-09-12
## 6 2019-09-15
##                                                                                site_description
## 1                                      Alligator River National Wildlife Refuge, North Carolina
## 2                                                              Brooks Field site 10, Ames, Iowa
## 3                                                              Brooks Field site 11, Ames, Iowa
## 4                                ARS, Great Basin LTAR, ARTRW8 community, Reynolds Creek, Idaho
## 5                           ARS, Great Basin LTAR, ARTRV/SYORU community, Reynolds Creek, Idaho
## 6 Austin Cary Forest, School of Forest Resources & Conservation, University of Florida, Florida
##   primary_veg_type koeppen_geiger
## 1               DB            Cfa
## 2               AG            Dfa
## 3               AG            Dfa
## 4               SH            BSk
## 5               SH            BSk
## 6               EN            Cfa

#external data from phenocamapi package Now let’s select two combination phenocam-flux tower sites from different plant functional types to explore (e.g. one grassland site and one evergreen needleleaf site)

#example
GrassSites=phenos%>%
  filter(phenos$primary_veg_type=='GR')
head(GrassSites) #just viewing the top 6 sites in the dataframe
##         flux_sitenames         site date_first  date_last
## 1 US-MTB (forthcoming)      bozeman 2015-08-16 2019-09-15
## 2               US-FR1 freemangrass 2012-03-13 2014-02-27
## 3               US-KFS       kansas 2012-03-17 2019-09-15
## 4               US-Wkg      kendall 2012-07-06 2019-09-15
## 5               US-Kon        konza 2012-03-17 2019-09-15
## 6               CA-Let   lethbridge 2011-12-07 2019-09-15
##                                                           site_description
## 1                   Bangtail Study Area, Montana State University, Montana
## 2 Grassland Site, Texas State University, Freeman Ranch, San Marcos, Texas
## 3                           KU Field Station, University of Kansas, Kansas
## 4                                               Kendall Grassland, Arizona
## 5        Konza Prairie Biological Station, Kansas State University, Kansas
## 6                 Lethbridge Grassland Ecosystem Site, Lethbridge, Alberta
##   primary_veg_type koeppen_geiger
## 1               GR            Dfb
## 2               GR            Cfa
## 3               GR            Cfa
## 4               GR            BSk
## 5               GR            Cfa
## 6               GR            Dfb
DecidSites=phenos%>%
  filter(phenos$primary_veg_type=='DB')
head(DecidSites)
##   flux_sitenames           site date_first  date_last
## 1         US-NC4 alligatorriver 2012-05-03 2019-09-15
## 2         US-Bar       bartlett 2005-10-04 2016-06-09
## 3         US-Bar     bartlettir 2008-04-18 2019-09-15
## 4         US-Ha1           bbc1 2015-05-04 2019-09-15
## 5         US-Ha1           bbc2 2015-05-04 2019-09-15
## 6         US-Bar           bbc7 2015-05-15 2019-09-15
##                                                   site_description
## 1         Alligator River National Wildlife Refuge, North Carolina
## 2            Bartlett Experimental Forest, Bartlett, New Hampshire
## 3            Bartlett Experimental Forest, Bartlett, New Hampshire
## 4 Hardwood Walk-up Tower, Harvard Forest, Petersham, Massachusetts
## 5              LPH Tower, Harvard Forest, Petersham, Massachusetts
## 6            Bartlett Experimental Forest, Bartlett, New Hampshire
##   primary_veg_type koeppen_geiger
## 1               DB            Cfa
## 2               DB            Dfb
## 3               DB            Dfb
## 4               DB            Dfb
## 5               DB            Dfb
## 6               DB            Dfb
FirstGRSite=GrassSites[5, ] #I randomly chose a site in the table
FirstGRSite
##   flux_sitenames  site date_first  date_last
## 5         US-Kon konza 2012-03-17 2019-09-15
##                                                    site_description
## 5 Konza Prairie Biological Station, Kansas State University, Kansas
##   primary_veg_type koeppen_geiger
## 5               GR            Cfa
SecondDBSite=DecidSites[3,]
SecondDBSite
##   flux_sitenames       site date_first  date_last
## 3         US-Bar bartlettir 2008-04-18 2019-09-15
##                                        site_description primary_veg_type
## 3 Bartlett Experimental Forest, Bartlett, New Hampshire               DB
##   koeppen_geiger
## 3            Dfb

Chose your own sites and build out your code chunk here:

 #Copy and past the code above to select sites that you are interested in
#Drop your code here

Koen Huffkens developed the phenocamr package, which streamlines access to quality controlled data. We will now use this package to download and process site based data according to a standardized methodology.

A full description of the methodology is provided in Scientific Data: Tracking vegetation phenology across diverse North American biomes using PhenoCam imagery (Richardson et al. 2018).

#uncomment if you need to install via devtools
#if(!require(devtools)){install.package(devtools)}
#devtools::install_github("khufkens/phenocamr")
library(phenocamr)

Use the dataframe you built to feed the phenocamr package. Note: you can choose either a daily or 3 day product

phenocamr::download_phenocam(
  frequency = 3,
  veg_type = FirstGRSite$primary_veg_type,
  roi_id = 1000,
  site = paste0(FirstGRSite$site, '$'),
  phenophase = TRUE,
  out_dir = "data"
  )
## Downloading: konza_GR_1000_3day.csv
## -- Flagging outliers!
## -- Smoothing time series!
## -- Estimating transition dates!
phenocamr::download_phenocam(
  frequency = 3,
  veg_type = SecondDBSite$primary_veg_type,
  roi_id = 1000,
  site = paste0(SecondDBSite$site, '$'),
  phenophase = TRUE,
  out_dir = "data"
  )
## Downloading: bartlettir_DB_1000_3day.csv
## -- Flagging outliers!
## -- Smoothing time series!
## -- Estimating transition dates!

Now look in your working directory. You have data! Read it in:

# load the time series data but replace the csv filename with whatever you downloaded
df <- read.table("data/konza_GR_1000_3day.csv", header = TRUE, sep = ",")

# read in the transition date file
td <- read.table("data/konza_GR_1000_3day_transition_dates.csv",
                 header = TRUE,
                 sep = ",")

Let’s take a look at the data:

p = plot_ly() %>%
  add_trace(
  data = df,
  x = ~ as.Date(date),
  y = ~ smooth_gcc_90,
  name = 'Smoothed GCC',
  showlegend = TRUE,
  type = 'scatter',
  mode = 'line'
) %>% add_markers(
  data=df,
  x ~ as.Date(date),
  y = ~gcc_90,
  name = 'GCC',
  type = 'scatter',
  color ='#07A4B5', 
  opacity=.5
)
p
## Warning: Ignoring 2192 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

What patterns do you notice? How would we go about determining say the start of spring? (SOS)

###Threshold values

Let’s subset the transition date (td) for each year when 25% of the greenness amplitude (of the 90^th) percentile is reached (threshold_25).

# select the rising (spring dates) for 25% threshold of Gcc 90
spring <- td[td$direction == "rising" & td$gcc_value == "gcc_90",]

Now let’s create a simple plot_ly line graph of the smooth Green Chromatic Coordinate (Gcc) and add points for transition dates:

p = plot_ly() %>%
  add_trace(
  data = df,
  x = ~ as.Date(date),
  y = ~ smooth_gcc_90,
  name = 'PhenoCam GCC',
  showlegend = TRUE,
  type = 'scatter',
  mode = 'line'
) %>% add_markers(
  data= spring, 
  x = ~ as.Date(spring$transition_25, origin = "1970-01-01"),
  y = ~ spring$threshold_25,
  type = 'scatter',
  mode = 'marker',
  name = 'Spring Dates')
                
p

Now we can see the transition date for each year of interest and the annual patterns of greenness.

However, if you want more control over the parameters used during processing, you can run through the three default processing steps as implemented in download_phenocam() and set parameters manually.

Of particular interest is the option to specify your own threshold used in determining transition dates.

What would be a reasonable threshold for peak greenness? Or autumn onset? Look at the ts dataset and phenocamr package and come up with a threshold. Use the same code to plot it here:

#print('code here')
#some hint code
#what does 'rising' versus 'falling' denote?
#what threshold should you choose?
#td <- phenophases("konza_GR_1000_3day.csv",
#            internal = TRUE,
#            upper_thresh = 0.8)
fall <- td[td$direction == "falling" & td$gcc_value == "gcc_90",]
#Now generate a fall dataframe, what metrics should you use?

Let’s load in a function to make plotting smoother. I’m dropped it here in the markdown so that you can edit it and re-run it as you see fit:

gcc_plot = function(gcc, spring, fall){
  unix = "1970-01-01"

  p = plot_ly(
    data = gcc,
    x = ~ date,
    y = ~ gcc_90,
    showlegend = FALSE,
    type = 'scatter',
    mode = 'markers'
  ) %>%
    add_trace(
      y = ~ smooth_gcc_90,
      mode = "lines",
      line = list(width = 2, color = "rgb(120,120,120)"),
      name = "Gcc loess fit",
      showlegend = TRUE
    ) %>%
    # SOS spring
    # 10%
    add_trace(
      data = spring,
      x = ~ as.Date(transition_10),
      y = ~ threshold_10,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#7FFF00", symbol = "circle"),
      name = "SOS (10%)",
      showlegend = TRUE
    ) %>%
    add_segments(x = ~ as.Date(transition_10_lower_ci),
                 xend = ~ as.Date(transition_10_upper_ci),
                 # y = ~ 0,
                 # yend = ~ 1,
                 y = ~ threshold_10,
                 yend = ~ threshold_10,
                 line = list(color = "#7FFF00"),
                 name = "SOS (10%) - CI"
    ) %>%
    # 25 %
    add_trace(
      x = ~ as.Date(transition_25),
      y = ~ threshold_25,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#66CD00", symbol = "square"),
      showlegend = TRUE,
      name = "SOS (25%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_25_lower_ci),
                 xend = ~ as.Date(transition_25_upper_ci),
                 y = ~ threshold_25,
                 yend = ~ threshold_25,
                 line = list(color = "#66CD00"),
                 name = "SOS (25%) - CI"
    ) %>%
    # 50 %
    add_trace(
      x = ~ as.Date(transition_50),
      y = ~ threshold_50,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#458B00", symbol = "diamond"),
      showlegend = TRUE,
      name = "SOS (50%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_50_lower_ci),
                 xend = ~ as.Date(transition_50_upper_ci),
                 y = ~ threshold_50,
                 yend = ~ threshold_50,
                 line = list(color = "#458B00"),
                 name = "SOS (50%) - CI"
    ) %>%
    
    # EOS fall
    # 50%
    add_trace(
      data = fall,
      x = ~ as.Date(transition_50),
      y = ~ threshold_50,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#FFB90F", symbol = "diamond"),
      showlegend = TRUE,
      name = "EOS (50%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_50_lower_ci),
                 xend = ~ as.Date(transition_50_upper_ci),
                 y = ~ threshold_50,
                 yend = ~ threshold_50,
                 line = list(color = "#FFB90F"),
                 name = "EOS (50%) - CI"
    ) %>%
    # 25 %
    add_trace(
      x = ~ as.Date(transition_25),
      y = ~ threshold_25,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#CD950C", symbol = "square"),
      showlegend = TRUE,
      name = "EOS (25%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_25_lower_ci),
                 xend = ~ as.Date(transition_25_upper_ci),
                 y = ~ threshold_25,
                 yend = ~ threshold_25,
                 line = list(color = "#CD950C"),
                 name = "EOS (25%) - CI"
    ) %>%
    # 10 %
    add_trace(
      x = ~ as.Date(transition_10),
      y = ~ threshold_10,
      mode = "markers",
      marker = list(color = "#8B6508", symbol = "circle"),
      showlegend = TRUE,
      name = "EOS (10%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_10_lower_ci),
                 xend = ~ as.Date(transition_10_upper_ci),
                 y = ~ threshold_10,
                 yend = ~ threshold_10,
                 line = list(color = "#8B6508"),
                 name = "EOS (10%) - CI"
    )
  return (p)
}
gr1 = gcc_plot(df, spring, fall)
gr1
## Warning: Ignoring 2192 observations
## Warning: Can't display both discrete & non-discrete data on same axis

Now let’s look at inter-annual variation in spring onset. What is the difference in 25% greenness onset for your first site? #hint, look at the spring dataframe you just generated

#some hints to get you started
yr=spring$transition_25
yr=as.Date(yr)
yr
## [1] "2012-04-24" "2013-05-04" "2014-04-23" "2015-04-22" "2016-05-02"
## [6] "2017-04-21" "2018-05-01" "2019-04-30"
#pull out spring transition dates into separate columns
dates_split <- data.frame(date = yr,
                 year = as.numeric(format(yr, format = "%Y")),
                 month = as.numeric(format(yr, format = "%m")),
                 day = as.numeric(format(yr, format = "%d")))

#or track DOY changes
doy=as.Date(yr, format='%d%m%Y')
doy=lubridate::yday(doy)
doy
## [1] 115 124 113 112 123 111 121 120
doy=as.data.frame(doy)
spring_variation=cbind(yr, doy)
 p = plot_ly(
    data = spring_variation,
    x = ~ yr,
    y = ~ doy,
    showlegend = FALSE,
    type = 'scatter',
    mode = 'markers', 
    name = "Year"
  ) %>%
    # 25 %
    add_trace(
      x = ~ yr,
      mode = "lines",
      line = list(width = 2, color = "rgb(120,120,120)"),
      showlegend = FALSE
      
    )
p

*** ###Comparing phenology of the same vegetation cover but across climate space

As Dr. Richardson mentioned this morning in his introduction lecture, the same plant functional types (e.g. grasslands) can have very different phenological cycles. Let’s pick two phenocam grassland sites: one from a tropical climate (kamuela), and one from an arid climate #edit

SecondGRSite=GrassSites[4,]
phenocamr::download_phenocam(
  frequency = 3,
  veg_type = SecondGRSite$primary_veg_type,
  roi_id = 1000,
  site = paste0(SecondGRSite$site, '$'),
  phenophase = TRUE,
  out_dir = "data"
  )
## Downloading: kendall_GR_1000_3day.csv
## -- Flagging outliers!
## -- Smoothing time series!
## -- Estimating transition dates!

Now use the code you’ve generated above to pull in data from those sites:

#code here
# load the time series data but replace the csv filename with whatever you downloaded
df <- read.table("data/kendall_GR_1000_3day.csv", header = TRUE, sep = ",")

# read in the transition date file
td <- read.table("data/kendall_GR_1000_3day_transition_dates.csv",
                 header = TRUE,
                 sep = ",")
spring <- td[td$direction == "rising" & td$gcc_value == "gcc_90",]
fall <- td[td$direction == "falling" & td$gcc_value == "gcc_90",]
gr2 = gcc_plot(df, spring, fall)
gr2
## Warning: Ignoring 1938 observations
## Warning: Can't display both discrete & non-discrete data on same axis

Now let’s create a subplot of your grasslands to compare phenology, some hint code below:

#some hint code for subplotting in plot_ly:
p <- subplot(gr1, gr2, nrows=2)
## Warning: Ignoring 2192 observations
## Warning: Can't display both discrete & non-discrete data on same axis
## Warning: Ignoring 1938 observations
## Warning: Can't display both discrete & non-discrete data on same axis
p
## Warning: Can't display both discrete & non-discrete data on same axis

## Warning: Can't display both discrete & non-discrete data on same axis

Once you have a subplot of grassland phenology across 2 climates answer the following questions here in the markdown: 1. What seasonal patterns do you see? 2. Do you think you set your thresholds correctly for transition dates/phenophases? How might that very as a function of climate?


##Flux Data Integration

Finally, let’s pull in some cleaned AmeriFlux data:

fluxts <- read.csv("data/example_flux/Bartlett_daily_gf_reformat.csv")
head(fluxts)
##    X       date year month day   GPP_U50
## 1  1 2004-01-01 2004     1   1 0.1865041
## 2 15 2004-01-02 2004     1   2 0.1011034
## 3 29 2004-01-03 2004     1   3 0.3155800
## 4 43 2004-01-04 2004     1   4 0.2455534
## 5 57 2004-01-05 2004     1   5 0.1761416
## 6 71 2004-01-06 2004     1   6 0.1274941

Let’s read in the phenocam data for that fluxtower and plot it:

# load the time series data but replace the csv filename with whatever you downloaded
df <- read.table("data/bartlettir_DB_1000_3day.csv", header = TRUE, sep = ",")

# read in the transition date file
td <- read.table("data/bartlettir_DB_1000_3day_transition_dates.csv",
                 header = TRUE,
                 sep = ",")
spring <- td[td$direction == "rising" & td$gcc_value == "gcc_90",]
fall <- td[td$direction == "falling" & td$gcc_value == "gcc_90",]
gcc_p = gcc_plot(df, spring, fall)
gcc_p
## Warning: Ignoring 2312 observations
## Warning: Can't display both discrete & non-discrete data on same axis

Now let’s look at the flux data:

p = plot_ly(
    data = fluxts,
    x = ~ date,
    y = ~ GPP_U50,
    showlegend = FALSE,
    type = 'scatter',
    mode = 'markers'
  ) 
p

We’ll need to filter the phenocam data to match up with the fluxtower subset:

sum(is.element(df$date, as.factor(fluxts$date)))
## [1] 3346

Now on your own use this indexing code above to filter the phenocam data to overlap with the fluxtower data

df=df[is.element(df$date, as.factor(fluxts$date)), ]
fluxts=fluxts[is.element(as.factor(fluxts$date),df$date), ]
p1 = plot_ly(
    data = df,
    x = ~ date,
    y = ~ gcc_90,
    showlegend = FALSE,
    type = 'scatter',
    mode = 'markers'
  ) %>%
    add_trace(
      y = ~ smooth_gcc_90,
      mode = "lines",
      line = list(width = 2, color = "rgb(120,120,120)"),
      name = "Gcc loess fit",
      showlegend = TRUE
    )
p1
## Warning: Ignoring 2312 observations
#some hint code for subplotting in plot_ly:
p <- subplot(p, p1, nrows=2)
## Warning: Ignoring 2312 observations
p

You’ve now been introduced to the basics of working with the PhenoCam API, using the PhenoR package, and integrating PhenoCam and AmeriFlux data. Good luck conducting your own analyses!